library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
library(stringr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ forcats 0.5.1
## ✓ readr 1.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(huxtable)
##
## Attaching package: 'huxtable'
## The following object is masked from 'package:dplyr':
##
## add_rownames
## The following object is masked from 'package:ggplot2':
##
## theme_grey
library(broom)
wahlkreise_shp <- st_read(my_path_wahlkreise)
## Reading layer `Geometrie_Wahlkreise_20DBT' from data source
## `/Users/duzhiting/Documents/GitHub/R-Prediction/btw21_geometrie_wahlkreise_shp/Geometrie_Wahlkreise_20DBT.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 299 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 280371.1 ymin: 5235856 xmax: 921120.1 ymax: 6101444
## Projected CRS: ETRS89 / UTM zone 32N
glimpse(wahlkreise_shp)
## Rows: 299
## Columns: 5
## $ WKR_NR <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 1…
## $ WKR_NAME <chr> "Flensburg – Schleswig", "Nordfriesland – Dithmarschen Nord"…
## $ LAND_NR <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", …
## $ LAND_NAME <chr> "Schleswig-Holstein", "Schleswig-Holstein", "Schleswig-Holst…
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((545529.8 60..., MULTIPOLYGON ((…
wahlkreise_shp %>%
ggplot() +
geom_sf()
wahlkreise_shp %>%
ggplot() +
geom_sf(fill = "grey40") +
theme_void()
unemp_file <- "~/Documents/Github/R-Prediction/btw21_Strukturdaten.csv"
file.exists(unemp_file)
## [1] TRUE
unemp_de_raw <- read_delim(unemp_file,
";", escape_double = FALSE,
locale = locale(decimal_mark = ",",
grouping_mark = "."),
trim_ws = TRUE,
skip = 8) # skipt the first 8 rows
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## Land = col_character(),
## `Wahlkreis-Nr.` = col_character(),
## `Wahlkreis-Name` = col_character(),
## `Fläche am 31.12.2019 (km²)` = col_number(),
## Fußnoten = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
unemp_names <- names(unemp_de_raw)
unemp_de <- unemp_de_raw
names(unemp_de) <- paste0("V",1:ncol(unemp_de))
The important columns are:
unemp_de <- unemp_de %>%
rename(state = V1,
area_nr = V2,
area_name = V3,
for_prop = V8,
pop_move = V11,
pop_migr_background = V19,
income = V26,
unemp = V47)
elec_results = read.csv2("~/Documents/Github/R-Prediction/kerg.csv", head = TRUE, sep="\t")
head(elec_results)
| Nr | Gebiet | Waehler_gueltige_Zweitstimmen_vorlauefig | AfD3 |
|---|---|---|---|
| 1 | Flensburg – Schleswig | 180112 | 10317 |
| 2 | Nordfriesland – Dithmarschen Nord | 145387 | 8798 |
| 3 | Steinburg – Dithmarschen Süd | 136299 | 11303 |
| 4 | Rendsburg-Eckernförde | 162060 | 10564 |
| 5 | Kiel | 155986 | 7654 |
| 6 | Plön – Neumünster | 133721 | 9741 |
glimpse(wahlkreise_shp)
## Rows: 299
## Columns: 5
## $ WKR_NR <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 1…
## $ WKR_NAME <chr> "Flensburg – Schleswig", "Nordfriesland – Dithmarschen Nord"…
## $ LAND_NR <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", …
## $ LAND_NAME <chr> "Schleswig-Holstein", "Schleswig-Holstein", "Schleswig-Holst…
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((545529.8 60..., MULTIPOLYGON ((…
afd_prop <- elec_results %>%
rename(afd_votes = AfD3,
area_nr = Nr,
area_name = Gebiet,
votes_total = Waehler_gueltige_Zweitstimmen_vorlauefig) %>%
mutate(afd_prop = afd_votes / votes_total) %>%
na.omit
glimpse(afd_prop)
## Rows: 299
## Columns: 5
## $ area_nr <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ area_name <chr> "Flensburg – Schleswig", "Nordfriesland – Dithmarschen Nor…
## $ votes_total <int> 180112, 145387, 136299, 162060, 155986, 133721, 191512, 19…
## $ afd_votes <int> 10317, 8798, 11303, 10564, 7654, 9741, 13080, 14527, 10073…
## $ afd_prop <dbl> 0.05728103, 0.06051435, 0.08292797, 0.06518573, 0.04906851…
glimpse(unemp_de)
## Rows: 316
## Columns: 52
## $ state <chr> "Schleswig-Holstein", "Schleswig-Holstein", "Schle…
## $ area_nr <chr> "001", "002", "003", "004", "005", "006", "007", "…
## $ area_name <chr> "Flensburg – Schleswig", "Nordfriesland – Dithmars…
## $ V4 <dbl> 126, 197, 178, 163, 3, 92, 49, 95, 49, 126, 28, 11…
## $ V5 <dbl> 2124.3, 2777.9, 2000.0, 2165.4, 143.0, 1302.4, 664…
## $ V6 <dbl> 291.300, 235.000, 221.000, 252.100, 268.800, 223.0…
## $ V7 <dbl> 266.9, 218.3, 206.6, 238.2, 238.1, 203.7, 280.9, 2…
## $ for_prop <dbl> 8.4, 7.1, 6.5, 5.5, 11.4, 8.7, 11.1, 8.0, 5.7, 8.6…
## $ V9 <dbl> 137.1, 84.6, 110.5, 116.4, 1879.2, 171.2, 475.9, 2…
## $ V10 <dbl> -2.7, -5.2, -5.3, -3.8, -0.8, -4.0, -2.5, -1.9, -6…
## $ pop_move <dbl> 9.5, 8.3, 4.6, 8.6, -1.8, 5.1, 8.1, 5.7, 6.8, 8.8,…
## $ V12 <dbl> 16.6, 15.6, 16.3, 16.9, 14.9, 16.1, 17.1, 17.2, 14…
## $ V13 <dbl> 8.4, 7.7, 7.2, 7.0, 10.6, 7.2, 7.1, 6.8, 6.3, 6.6,…
## $ V14 <dbl> 12.0, 11.0, 10.5, 9.9, 17.2, 10.6, 10.8, 10.5, 9.3…
## $ V15 <dbl> 33.6, 34.0, 35.7, 35.9, 32.1, 34.6, 36.4, 36.9, 35…
## $ V16 <dbl> 17.6, 18.9, 18.2, 18.3, 14.6, 18.2, 16.5, 17.0, 20…
## $ V17 <dbl> 11.9, 12.8, 12.2, 12.0, 10.5, 13.3, 12.1, 11.6, 14…
## $ V18 <dbl> 12.4, 11.0, 11.5, 11.4, 50.8, 12.1, 22.2, 15.5, 12…
## $ pop_migr_background <dbl> 87.6, 89.0, 88.5, 88.6, 49.2, 87.9, 77.8, 84.5, 87…
## $ V20 <dbl> 6.4, 8.3, 3.8, 4.3, 3.1, 2.6, 5.0, 5.3, 5.9, 3.3, …
## $ V21 <dbl> 523.1, 588.9, 505.4, 500.1, 546.7, 505.6, 491.3, 4…
## $ V22 <dbl> 94.3, 94.6, 97.5, 99.0, 72.0, 90.7, 91.5, 96.0, 90…
## $ V23 <dbl> 49.3, 55.7, 49.2, 49.5, 39.4, 45.8, 44.9, 46.0, 51…
## $ V24 <dbl> 592.1, 620.2, 617.8, 634.0, 465.3, 588.1, 560.8, 6…
## $ V25 <dbl> 1.0, 1.2, 0.9, 1.1, 1.5, 1.1, 1.4, 1.3, 0.9, 1.2, …
## $ income <dbl> 41.0, 56.1, 40.0, 37.9, 35.3, 37.6, 42.9, 43.4, 46…
## $ V27 <dbl> 6.9, 9.0, 6.9, 6.7, 4.3, 6.4, 6.9, 7.0, 7.3, 6.6, …
## $ V28 <dbl> 4.8, 3.9, 2.9, 2.4, 6.0, 4.8, 2.2, 2.6, 3.4, 2.3, …
## $ V29 <dbl> 10.2, 10.8, 10.6, 9.5, 9.0, 10.4, 10.6, 10.2, 9.8,…
## $ V30 <dbl> 9.1, 9.6, 9.6, 9.5, 9.1, 9.8, 7.6, 8.4, 10.6, 7.9,…
## $ V31 <dbl> 17.7, 19.6, 17.8, 19.0, 15.8, 17.8, 15.4, 16.9, 20…
## $ V32 <dbl> 39.0, 44.0, 40.2, 35.7, 32.5, 35.4, 38.6, 37.0, 37…
## $ V33 <dbl> 34.2, 26.8, 32.4, 35.8, 42.5, 37.0, 38.3, 37.7, 31…
## $ V34 <dbl> 38.8, 31.7, 31.8, 36.9, 35.7, 34.1, 32.2, 36.0, 36…
## $ V35 <dbl> 93.3, 93.2, 89.9, 92.1, 90.2, 87.2, 87.2, 88.7, 93…
## $ V36 <dbl> 21358, 24354, 22292, 23410, 19718, 22081, 24708, 2…
## $ V37 <dbl> 31178, 34160, 31977, 29036, 46128, 29053, 29803, 3…
## $ V38 <dbl> 345.0, 355.5, 313.4, 291.6, 490.7, 329.6, 295.0, 3…
## $ V39 <dbl> 1.7, 2.8, 3.0, 2.6, 0.2, 1.6, 2.2, 1.1, 1.4, 1.2, …
## $ V40 <dbl> 19.3, 20.5, 27.7, 23.7, 16.4, 22.1, 29.0, 28.4, 21…
## $ V41 <dbl> 27.0, 32.1, 22.3, 22.9, 19.7, 28.7, 29.0, 29.4, 31…
## $ V42 <dbl> 15.4, 11.8, 15.1, 17.0, 24.8, 17.5, 14.5, 16.0, 12…
## $ V43 <dbl> 36.7, 32.7, 32.0, 33.9, 38.9, 30.1, 25.4, 25.1, 33…
## $ V44 <dbl> 76.8, 59.1, 70.6, 52.8, 125.2, 75.8, 67.4, 50.4, 5…
## $ V45 <dbl> 26.1, 26.1, 26.3, 29.6, 26.8, 27.7, 29.4, 29.0, 26…
## $ V46 <dbl> 28.2, 23.8, 29.3, 33.2, 35.1, 30.1, 44.4, 38.8, 27…
## $ unemp <dbl> 7.0, 6.5, 6.4, 4.8, 8.4, 6.7, 5.9, 5.0, 6.1, 5.1, …
## $ V48 <dbl> 7.7, 6.9, 6.8, 5.2, 9.2, 7.3, 6.3, 5.2, 6.5, 5.4, …
## $ V49 <dbl> 6.2, 5.9, 6.1, 4.3, 7.4, 6.1, 5.4, 4.8, 5.6, 4.7, …
## $ V50 <dbl> 5.9, 5.4, 7.0, 4.7, 5.1, 6.7, 5.3, 4.6, 5.4, 5.5, …
## $ V51 <dbl> 7.6, 7.2, 6.4, 5.2, 8.4, 7.0, 6.1, 5.5, 7.4, 5.2, …
## $ V52 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
unemp_de$area_nr <- as.integer(unemp_de$area_nr)
wahlkreise_shp %>%
left_join(unemp_de, by = c("WKR_NR" = "area_nr")) %>%
left_join(afd_prop, by = "area_name") -> chloro_data
chloro_data %>%
ggplot() +
geom_sf(aes(fill = afd_prop)) -> p1
p1
p1 + scale_fill_distiller(palette = "Spectral") +
theme_void()
chloro_data %>%
ggplot() +
geom_sf(aes(fill = unemp)) +
scale_fill_distiller(palette = "Spectral") +
theme_void() -> p2
p2
chloro_data %>%
mutate(afd_rank = percent_rank(afd_prop),
unemp_rank = percent_rank(unemp),
income_rank = percent_rank(income)) %>%
mutate(afd_income_diff = subtract(afd_rank, income_rank),
afd_unemp_diff = subtract(afd_rank, unemp_rank)) -> chloro_data
chloro_data %>%
as.data.frame %>%
select(area_name, afd_rank, afd_prop, unemp_rank, income_rank) %>%
arrange(-afd_rank) %>%
slice(1:5)
| area_name | afd_rank | afd_prop | unemp_rank | income_rank |
|---|---|---|---|---|
| Görlitz | 1 | 0.321 | 0.856 | 0.228 |
| Sächsische Schweiz-Osterzgebirge | 0.997 | 0.315 | 0.419 | 0.601 |
| Bautzen I | 0.993 | 0.314 | 0.433 | 0.255 |
| Erzgebirgskreis I | 0.99 | 0.302 | 0.362 | 0.617 |
| Mittelsachsen | 0.986 | 0.297 | 0.413 | 0.403 |
chloro_data %>%
as.data.frame %>%
select(area_name, afd_prop, unemp_rank, income_rank) %>%
arrange(-unemp_rank) %>%
slice(1:5)
| area_name | afd_prop | unemp_rank | income_rank |
|---|---|---|---|
| Gelsenkirchen | 0.126 | 1 | 0.0101 |
| Duisburg I | 0.0826 | 0.993 | 0.0134 |
| Duisburg II | 0.12 | 0.993 | 0.0134 |
| Bremen II – Bremerhaven | 0.0884 | 0.983 | 0.292 |
| Dortmund I | 0.069 | 0.983 | 0.121 |
chloro_data %>%
as.data.frame %>%
select(area_name, afd_prop, unemp_rank, income_rank) %>%
arrange(unemp_rank) %>%
slice(1:5)
| area_name | afd_prop | unemp_rank | income_rank |
|---|---|---|---|
| Donau-Ries | 0.109 | 0 | 0.651 |
| Erding – Ebersberg | 0.0726 | 0.00336 | 0.862 |
| Biberach | 0.107 | 0.00336 | 0.466 |
| Roth | 0.0846 | 0.0101 | 0.748 |
| Mittelems | 0.0512 | 0.0134 | 0.305 |
chloro_data %>%
as.data.frame %>%
select(area_name, afd_prop, unemp_rank, income_rank) %>%
arrange(income_rank) %>%
slice(c(1:5, 294:299))
| area_name | afd_prop | unemp_rank | income_rank |
|---|---|---|---|
| Helmstedt – Wolfsburg | 0.0937 | 0.48 | 0 |
| Gifhorn – Peine | 0.0963 | 0.332 | 0.00336 |
| Salzgitter – Wolfenbüttel | 0.0979 | 0.718 | 0.00671 |
| Gelsenkirchen | 0.126 | 1 | 0.0101 |
| Duisburg I | 0.0826 | 0.993 | 0.0134 |
| München-West/Mitte | 0.0424 | 0.362 | 0.973 |
| Düsseldorf I | 0.0428 | 0.819 | 0.987 |
| Düsseldorf II | 0.0598 | 0.819 | 0.987 |
| Starnberg – Landsberg am Lech | 0.0611 | 0.0772 | 0.993 |
| Bad Tölz-Wolfratshausen – Miesbach | 0.0796 | 0.057 | 0.997 |
| München-Land | 0.0526 | 0.0436 | 1 |
chloro_data %>%
ggplot() +
geom_sf(aes(fill = afd_unemp_diff)) +
scale_fill_gradient2() +
theme_void() -> p3
p3
chloro_data %>%
as.data.frame %>%
select(area_name, afd_unemp_diff, unemp, afd_prop) %>%
arrange(afd_unemp_diff) %>%
slice(c(1:5, 295:299)) %>% hux %>%
add_colnames
| area_name | afd_unemp_diff | unemp | afd_prop |
|---|---|---|---|
| area_name | afd_unemp_diff | unemp | afd_prop |
| Berlin-Friedrichshain-Kreuzberg – Prenzlauer Berg Ost | -0.903 | 10.6 | 0.0405 |
| Köln II | -0.886 | 9.8 | 0.0287 |
| Bremen I | -0.875 | 11.2 | 0.0528 |
| Essen III | -0.868 | 11.5 | 0.0549 |
| Berlin-Charlottenburg-Wilmersdorf | -0.862 | 10.6 | 0.0473 |
| Schwäbisch Hall – Hohenlohe | 0.75 | 3.6 | 0.126 |
| Rottweil – Tuttlingen | 0.754 | 3.7 | 0.132 |
| Neu-Ulm | 0.76 | 3.2 | 0.118 |
| Höxter – Gütersloh III – Lippe II | 4.9 | ||
| Paderborn | 5.9 |
chloro_data %>%
as.data.frame %>%
select(area_name, afd_unemp_diff, unemp, afd_prop) %>%
arrange(afd_unemp_diff) %>%
filter(afd_unemp_diff > -0.05, afd_unemp_diff < .05) %>%
hux %>%
add_colnames
| area_name | afd_unemp_diff | unemp | afd_prop |
|---|---|---|---|
| area_name | afd_unemp_diff | unemp | afd_prop |
| Rotenburg I – Heidekreis | -0.0444 | 5.7 | 0.0795 |
| Halle | -0.0413 | 9.5 | 0.147 |
| Groß-Gerau | -0.0337 | 6.1 | 0.087 |
| Augsburg-Stadt | -0.0299 | 6.4 | 0.0906 |
| Magdeburg | -0.0245 | 9.2 | 0.15 |
| Segeberg – Stormarn-Mitte | -0.0215 | 5 | 0.0728 |
| Herzogtum Lauenburg – Stormarn-Süd | -0.0146 | 5.1 | 0.0746 |
| Celle – Uelzen | -0.00964 | 6.4 | 0.0913 |
| Harburg | -0.00823 | 4.8 | 0.0722 |
| Ludwigshafen/Frankenthal | 0.00512 | 8.1 | 0.117 |
| Nienburg II – Schaumburg | 0.00662 | 5.9 | 0.087 |
| Coesfeld – Steinfurt II | 0.00696 | 3.4 | 0.0456 |
| St. Wendel | 0.0173 | 6.3 | 0.0923 |
| Kreuznach | 0.0177 | 6.7 | 0.0978 |
| Hochsauerlandkreis | 0.0222 | 4.8 | 0.0737 |
| Hochtaunus | 0.0225 | 5.1 | 0.0775 |
| Mecklenburgische Seenplatte I – Vorpommern-Greifswald II | 0.0264 | 9.9 | 0.228 |
| Leipzig I | 0.0393 | 8.3 | 0.154 |
| Siegen-Wittgenstein | 0.0439 | 6 | 0.0907 |
| München-Land | 0.0476 | 3.5 | 0.0526 |
| Altmark | 0.0496 | 8.7 | 0.189 |
chloro_data %>%
ggplot() +
geom_sf(aes(fill = afd_income_diff)) +
scale_fill_gradient2() +
theme_void() -> p4
p4
## More simple map
chloro_data %>%
select(afd_unemp_diff) %>%
mutate(afd_unemp_diff_3g = cut_interval(afd_unemp_diff, n = 3,
labels = c("AFD < Arbeitslosigkeit",
"AFD = Arbeitslosigkeit",
"AFD > Arbeitslosigkeit"))) %>%
ggplot() +
geom_sf(aes(fill = afd_unemp_diff_3g)) +
labs(fill) +
theme_void()
library(viridis)
## Loading required package: viridisLite
chloro_data %>%
mutate(afd_dens = afd_prop / unemp) %>%
ggplot +
geom_sf(aes(fill = afd_dens)) +
theme_void() +
scale_fill_viridis()
chloro_data %>%
select(unemp, afd_prop, income, area_name) %>%
ggplot +
aes(x = unemp, y = afd_prop) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
chloro_data %>%
select(unemp, afd_prop, income, area_name) %>%
as.data.frame %T>%
summarise(cor_afd_unemp = cor(afd_prop, unemp)) %>%
do(tidy(cor.test(.$afd_prop, .$unemp)))
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| 0.0356 | 0.612 | 0.541 | 295 | -0.0785 | 0.149 | Pearson's product-moment correlation | two.sided |